import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import cell2location
from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs
Global seed set to 0 /home/conda/envs/scrna/lib/python3.9/site-packages/pytorch_lightning/utilities/warnings.py:53: LightningDeprecationWarning: pytorch_lightning.utilities.warnings.rank_zero_deprecation has been deprecated in v1.6 and will be removed in v1.8. Use the equivalent function from the pytorch_lightning.utilities.rank_zero module instead. new_rank_zero_deprecation( /home/conda/envs/scrna/lib/python3.9/site-packages/pytorch_lightning/utilities/warnings.py:58: LightningDeprecationWarning: The `pytorch_lightning.loggers.base.rank_zero_experiment` is deprecated in v1.7 and will be removed in v1.9. Please use `pytorch_lightning.loggers.logger.rank_zero_experiment` instead. return new_rank_zero_deprecation(*args, **kwargs)
results_folder = './results/'
# create paths and names to results folders for reference regression and cell2location models
run_name = f'{results_folder}/cell2location_map'
adata_file = f"{run_name}/sp.h5ad"
adata_vis = sc.read_h5ad(adata_file)
adata_vis
AnnData object with n_obs × n_vars = 27912 × 10533
obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'donor_id', 'region', '_indices', '_scvi_batch', '_scvi_labels'
var: 'feature_types', 'genome', 'SYMBOL', 'mt', 'n_cells_by_counts-6800STDY12499406', 'mean_counts-6800STDY12499406', 'log1p_mean_counts-6800STDY12499406', 'pct_dropout_by_counts-6800STDY12499406', 'total_counts-6800STDY12499406', 'log1p_total_counts-6800STDY12499406', 'n_cells_by_counts-6800STDY12499407', 'mean_counts-6800STDY12499407', 'log1p_mean_counts-6800STDY12499407', 'pct_dropout_by_counts-6800STDY12499407', 'total_counts-6800STDY12499407', 'log1p_total_counts-6800STDY12499407', 'n_cells_by_counts-6800STDY12499408', 'mean_counts-6800STDY12499408', 'log1p_mean_counts-6800STDY12499408', 'pct_dropout_by_counts-6800STDY12499408', 'total_counts-6800STDY12499408', 'log1p_total_counts-6800STDY12499408', 'n_cells_by_counts-6800STDY12499409', 'mean_counts-6800STDY12499409', 'log1p_mean_counts-6800STDY12499409', 'pct_dropout_by_counts-6800STDY12499409', 'total_counts-6800STDY12499409', 'log1p_total_counts-6800STDY12499409', 'n_cells_by_counts-6800STDY12499410', 'mean_counts-6800STDY12499410', 'log1p_mean_counts-6800STDY12499410', 'pct_dropout_by_counts-6800STDY12499410', 'total_counts-6800STDY12499410', 'log1p_total_counts-6800STDY12499410', 'n_cells_by_counts-6800STDY12499411', 'mean_counts-6800STDY12499411', 'log1p_mean_counts-6800STDY12499411', 'pct_dropout_by_counts-6800STDY12499411', 'total_counts-6800STDY12499411', 'log1p_total_counts-6800STDY12499411', 'n_cells_by_counts-6800STDY12499412', 'mean_counts-6800STDY12499412', 'log1p_mean_counts-6800STDY12499412', 'pct_dropout_by_counts-6800STDY12499412', 'total_counts-6800STDY12499412', 'log1p_total_counts-6800STDY12499412', 'n_cells_by_counts-6800STDY12499413', 'mean_counts-6800STDY12499413', 'log1p_mean_counts-6800STDY12499413', 'pct_dropout_by_counts-6800STDY12499413', 'total_counts-6800STDY12499413', 'log1p_total_counts-6800STDY12499413', 'n_cells_by_counts-6800STDY12499502', 'mean_counts-6800STDY12499502', 'log1p_mean_counts-6800STDY12499502', 'pct_dropout_by_counts-6800STDY12499502', 'total_counts-6800STDY12499502', 'log1p_total_counts-6800STDY12499502', 'n_cells_by_counts-6800STDY12499503', 'mean_counts-6800STDY12499503', 'log1p_mean_counts-6800STDY12499503', 'pct_dropout_by_counts-6800STDY12499503', 'total_counts-6800STDY12499503', 'log1p_total_counts-6800STDY12499503', 'n_cells_by_counts-6800STDY12499504', 'mean_counts-6800STDY12499504', 'log1p_mean_counts-6800STDY12499504', 'pct_dropout_by_counts-6800STDY12499504', 'total_counts-6800STDY12499504', 'log1p_total_counts-6800STDY12499504', 'n_cells_by_counts-6800STDY12499505', 'mean_counts-6800STDY12499505', 'log1p_mean_counts-6800STDY12499505', 'pct_dropout_by_counts-6800STDY12499505', 'total_counts-6800STDY12499505', 'log1p_total_counts-6800STDY12499505', 'n_cells_by_counts-6800STDY12499506', 'mean_counts-6800STDY12499506', 'log1p_mean_counts-6800STDY12499506', 'pct_dropout_by_counts-6800STDY12499506', 'total_counts-6800STDY12499506', 'log1p_total_counts-6800STDY12499506', 'n_cells_by_counts-6800STDY12499507', 'mean_counts-6800STDY12499507', 'log1p_mean_counts-6800STDY12499507', 'pct_dropout_by_counts-6800STDY12499507', 'total_counts-6800STDY12499507', 'log1p_total_counts-6800STDY12499507', 'n_cells_by_counts-6800STDY12499508', 'mean_counts-6800STDY12499508', 'log1p_mean_counts-6800STDY12499508', 'pct_dropout_by_counts-6800STDY12499508', 'total_counts-6800STDY12499508', 'log1p_total_counts-6800STDY12499508', 'n_cells_by_counts-6800STDY12499509', 'mean_counts-6800STDY12499509', 'log1p_mean_counts-6800STDY12499509', 'pct_dropout_by_counts-6800STDY12499509', 'total_counts-6800STDY12499509', 'log1p_total_counts-6800STDY12499509'
uns: '_scvi_manager_uuid', '_scvi_uuid', 'mod', 'sample_colors', 'spatial'
obsm: 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'spatial', 'stds_cell_abundance_w_sf'
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata_vis.obs[adata_vis.uns['mod']['factor_names']] = adata_vis.obsm['q05_cell_abundance_w_sf']
/home/conda/envs/scrna/lib/python3.9/site-packages/pandas/core/frame.py:3641: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self[k1] = value[k2]
当前Anndata对象中包括了不止一张切片的数据
adata_vis.obs[["sample", "donor_id"]].value_counts()
sample donor_id 6800STDY12499406 PD43824 3352 6800STDY12499508 PD47465 2831 6800STDY12499509 PD47465 2811 6800STDY12499502 PD45814 2700 6800STDY12499504 PD45815 2670 6800STDY12499503 PD45814 2661 6800STDY12499505 PD45815 2580 6800STDY12499507 PD45816 2145 6800STDY12499409 PD47512 2085 6800STDY12499411 PD47171 2048 6800STDY12499506 PD45816 2029 dtype: int64
list(adata_vis.obs.columns)
['in_tissue', 'array_row', 'array_col', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'donor_id', 'region', '_indices', '_scvi_batch', '_scvi_labels', 'ACKR+_EC', 'Activated KRT+ NK', 'Activated NK', 'Activated_B-AREG', 'Activated_B-RHOB', 'CCL5+_EC', 'CD', 'CD4+T_Act-CCL5', 'CD4+T_Act-CH25H', 'CD4+T_Act-CXCR4', 'CD4+T_Act-NR4A1', 'CD4+T_EM', 'CD4+T_INF-response', 'CD4+T_Naïve/CM', 'CD4+Treg', 'CD8+T_Act-XCL1/2', 'CD8+T_Cycling-G1/S', 'CD8+T_Cycling-G2/M', 'CD8+T_EFF.1', 'CD8+T_EFF.2', 'CD8+T_EM', 'CD8+T_EMRA', 'CD8+T_EX-CCL4L2', 'CD8+T_EX-IL10', 'CD8+T_Naïve/CM', 'CD8+T_RM-CXCL13', 'CD8+T_preEX-CXCR4', 'CD8+T_preEX-PMCH', 'CD principal cell', 'CDH19+ Fibro', 'CNN1+ Fibro', 'CRHBP+_EC', 'CRYAB+ Epi', 'CYGB+ Fibro', 'Classical Mono.1', 'Classical Mono.2', 'Classical Mono.3', 'Classical Mono.4', 'Collagen Fibro', 'Collagen_EC', 'Connecting tubule cell', 'Cycling', 'Cycling_EC', 'DCT cell', 'DNASE1L3+_EC', 'EMT', 'FABP4+_EC', 'FN1+ TAM', 'GPNMB+ TAM', 'IGF2+_EC', 'IGFBP3+_EC', 'ILC', 'INF-response_B', 'Inflam NK', 'LoH ATL', 'LoH TAL', 'Lymphatic_EC', 'MAIT', 'MHC-II Fibro', 'MHC-II TAM', 'MHCII', 'MMP Fibro', 'Mast cell', 'NFKBIZ+ Fibro', 'NK-CD65-bright', 'NK-CX3CR1-high', 'NK-FXYD2', 'NK-IFNG-high', 'NK-MTRNR2L12', 'NK-SPARC', 'Naïve_B.1', 'Naïve_B.2', 'Non-classical Mono', 'Non-switched-memory_B', 'PT', 'PT1/2', 'PT3', 'Pelvic urothelial cell', 'Pericyte', 'Plasma-IgA', 'Plasma-IgG', 'Plasma-cycling', 'Podocyte', 'Pro-infla. TAM', 'RGS+ TAM', 'SMC-like Fibro', 'SPP1+ TAM', 'Stress', 'Stress Fibro', 'Stress_B', 'Switched-memory_B', 'TR Mac.1', 'TR Mac.2', 'TR Mac.3', 'Type A intercalated cell', 'Type B intercalated cell', 'cDC1', 'cDC2', 'cir_NK-PTGDS', 'cir_NK-S100B', 'gdT_Vd1', 'gdT_Vd2', 'pDC']
# select one slide
from cell2location.utils import select_slide
slide = select_slide(adata_vis, '6800STDY12499411')
# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor': 'black',
'figure.figsize': [4.5, 5]}):
sc.pl.spatial(slide, cmap='magma',
# show first 8 cell types
color=['EMT', 'PT'],
ncols=2, size=1.3,
img_key='hires',
# limit color scale at 99.2% quantile of cell abundance
vmin=0, vmax='p99.2'
)
# select one slide
from cell2location.utils import select_slide
slide = select_slide(adata_vis, '6800STDY12499506')
# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor': 'black',
'figure.figsize': [4.5, 5]}):
sc.pl.spatial(slide, cmap='magma',
# show first 8 cell types
color=['TR Mac.1', 'TR Mac.2', 'TR Mac.3', 'MHC-II TAM'],
ncols=2, size=1.3,
img_key='hires',
# limit color scale at 99.2% quantile of cell abundance
vmin=0, vmax='p99.2'
)
# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor': 'black',
'figure.figsize': [4.5, 5]}):
sc.pl.spatial(slide, cmap='magma',
# show first 8 cell types
color=['EMT', 'PT', 'TR Mac.2', 'LoH ATL'],
ncols=2, size=1.3,
img_key='hires',
# limit color scale at 99.2% quantile of cell abundance
vmin=0, vmax='p99.2'
)
# Now we use cell2location plotter that allows showing multiple cell types in one panel
from cell2location.plt import plot_spatial
# select up to 6 clusters
clust_labels = ['PT', 'EMT', 'LoH ATL', 'TR Mac.2']
clust_col = ['' + str(i) for i in clust_labels] # in case column names differ from labels
# slide = select_slide(adata_vis, '6800STDY12499507')
with mpl.rc_context({'figure.figsize': (15, 15)}):
fig = plot_spatial(
adata=slide,
# labels to show on a plot
color=clust_col, labels=clust_labels,
show_img=True,
# 'fast' (white background) or 'dark_background'
style='fast',
# limit color scale at 99.2% quantile of cell abundance
max_color_quantile=0.992,
# size of locations (adjust depending on figure size)
circle_diameter=6,
colorbar_position='right'
)
fig
# plt.savefig('figure_cell2location.pdf')